# setwd("~/Dropbox/T&G project survey/Ukraine/replication_JOP")
# libraries
library(tidyverse)
library(cobalt)
library(patchwork)
library(paletteer)
library(MatchIt)
library(estimatr)
library(lmtest)
library(stargazer)
source("func.R")

# ggplot defaults: geoms theme
update_geom_defaults("text", list(family = "Archivo Narrow"))
update_geom_defaults("label", list(family = "Archivo Narrow"))
theme_set(theme_nice())

## Load data
df = read_rds("data/clean_data.rds") %>% filter(post == 1)

# Set median interview date
median_date = quantile(df$date, 0.5, type = 1, na.rm = TRUE)
# Check
# table(df$date > median_date)

## Add placebo treatment variable
df$placebo = ifelse(df$date > median_date, 1, 0)

## Matched datasets

# Variables and formula
mvars = c("edad", "Q14", "ideo", "sexo", "clase_social_r", "educacion_r")
f = as.formula(paste0("placebo ~ ", paste(mvars, collapse = "+")))

# Matching and datasets
match_post = matchit(f, data = df, method = "full", estimand = "ATC")
matched_data_post = match.data(match_post)

# Days since placebo
matched_data_post$days_placebo = abs(matched_data_post$date - median_date)

## Modeling

m_natID_post = lm_robust(Q60 ~ placebo, data = matched_data_post,
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_post = lm_robust(Q61 ~ placebo, data = matched_data_post,
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_post5 = lm_robust(Q60 ~ placebo,
  data = subset(matched_data_post, days_placebo <= 5),
  weights = weights, clusters = subclass, se_type = "stata")
m_regID_post5 = lm_robust(Q61 ~ placebo,
  data = subset(matched_data_post, days_placebo <= 5),
  weights = weights, clusters = subclass, se_type = "stata")
m_natID_post_covs = lm_robust(Q60 ~ placebo +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r,
  data = matched_data_post, weights = weights, clusters = subclass, se_type = "stata")
m_regID_post_covs = lm_robust(Q61 ~ placebo +
  edad + Q14 + ideo + sexo + educacion_r + clase_social_r,
  data = matched_data_post, weights = weights, clusters = subclass, se_type = "stata")
m_natID_post_nomatch = lm_robust(Q60 ~ placebo, data = df[df$post == 1,],
  se_type = "stata")
m_regID_post_nomatch = lm_robust(Q61 ~ placebo, data = df[df$post == 1,],
  se_type = "stata")

model_names = c("Matched data", "Matched, within 5 days of placebo",
  "Matched data and covariates", "No matching")

placebo_coefs = rbind(
    tidy(m_natID_post, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_regID_post, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_natID_post5, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_regID_post5, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_natID_post_covs, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_regID_post_covs, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_natID_post_nomatch, conf.int = T) %>% filter(term == "placebo"),
    tidy(m_regID_post_nomatch, conf.int = T) %>% filter(term == "placebo")) %>%
  mutate(outcome2 = recode(outcome,
    "Q60" = "National\nidentification", "Q61" = "Regional\nidentification")) %>%
  mutate(model = rep(model_names, each = 2)) %>%
  mutate(
    conf.high90 = estimate + std.error * qnorm(0.95),
    conf.low90 = estimate - std.error * qnorm(0.95))

placebo_post = ggplot(placebo_coefs, aes(x = estimate, y = outcome2)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
    fatten = 1, size = 2, shape = 21, fill = "white") +
  geom_pointrange(aes(xmin = conf.low90, xmax = conf.high90),
    fatten = 1, size = 3, shape = 21, fill = "white") +
  geom_vline(xintercept = 0, linetype = "dotted") +
  labs(y = "", subtitle = "Bars indicate 90% and 95% CIs.",
    x = "Coefficient estimate of placebo (interview after median date)") +
  theme(axis.text.y = element_text(size = 12)) +
  facet_wrap(~ model, ncol = length(model_names))
ggsave("figures/placebo_test.pdf", width = 8.5, height = 3, device = cairo_pdf)
